# -*- encoding: utf-8 -*-
'''
@File    :   8.py
@Time    :   2022/11/09 23:11:39
@Author  :   Cms 
@Version :   1.0
@Contact :   2267000140@qq.com
@WebSite    :   www.baidu.com
'''

# here put the import lib


'''
8 在2个文件中存放了英文计算机技术文章(可以选择2篇关于Python技术文件操作处理技巧的2篇英文技术文章), 请读取文章内容,进行词频的统计;并分别输出统计结果到另外的文件存放;
    比较这2篇文章的相似度(如果词频最高的前10个词,重复了5个,相似度就是50%;重复了6个,相似度就是60% ,......);
'''

dict1 = {}
dict2 = {}
with open(r"homework3\paper1.txt") as f:
    txt = f.read()
    # print(txt)
    list1 = txt.split(" ")
    # print(list1)
    for i in list1:
        if(i in dict1):
            dict1[i] += 1
        else:
            dict1[i] = 1
    # print(dict1)

with open(r'homework3\paper2.txt') as f:
    txt = f.read()
    # print(txt)
    list1 = txt.split(" ")
    # print(list1)
    for i in list1:
        if(i in dict2):
            dict2[i] += 1
        else:
            dict2[i] = 1
    # print(dict2)


res1 = sorted(dict1.items(),key = lambda x:x[1],reverse = True)

res2 = sorted(dict2.items(),key = lambda x:x[1],reverse = True)

list2 = []
for i in res2[0:10]:
    list2.append(i[0])

num = 0
for i in res1[0:10]:
    if i[0] in list2:
        num += 1
print(num)

print('相似度: {:.0%}'.format(num/10))

